{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load the digits dataset and evolutionary_search"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((1797, 64), (1797,))"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from evolutionary_search import EvolutionaryAlgorithmSearchCV\n",
    "import sklearn.datasets\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "data = sklearn.datasets.load_digits()\n",
    "X = data[\"data\"]\n",
    "y = data[\"target\"]\n",
    "\n",
    "# make it a 2-class problem by only classifying the digit \"5\" vs the rest\n",
    "y = np.array([1 if yy == 5 else 0 for yy in y])\n",
    "\n",
    "X.shape, y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV\n",
    "from sklearn.svm import SVC"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train an SVM with RBF kernel"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Using conventional GridSearchCV"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Parameter grid: 625 parameter combinations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Size:  625\n"
     ]
    }
   ],
   "source": [
    "paramgrid = {\"kernel\": [\"rbf\"],\n",
    "             \"C\"     : np.logspace(-9, 9, num=25, base=10),\n",
    "             \"gamma\" : np.logspace(-9, 9, num=25, base=10)}\n",
    "print(\"Size: \", len(paramgrid[\"kernel\"])*len(paramgrid[\"C\"])*len(paramgrid[\"gamma\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 2 folds for each of 625 candidates, totalling 1250 fits\n",
      "Wall time: 3min 31s\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done 1250 out of 1250 | elapsed:  3.5min finished\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=StratifiedKFold(n_splits=2, random_state=None, shuffle=False),\n",
       "       error_score='raise',\n",
       "       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
       "  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n",
       "  max_iter=-1, probability=False, random_state=None, shrinking=True,\n",
       "  tol=0.001, verbose=False),\n",
       "       fit_params=None, iid=True, n_jobs=1,\n",
       "       param_grid={'C': array([  1.00000e-09,   5.62341e-09,   3.16228e-08,   1.77828e-07,\n",
       "         1.00000e-06,   5.62341e-06,   3.16228e-05,   1.77828e-04,\n",
       "         1.00000e-03,   5.62341e-03,   3.16228e-02,   1.77828e-01,\n",
       "         1.00000e+00,   5.62341e+00,   3.16228e+01,   1.77828e+02,\n",
       "         1.0000...7828e+05,\n",
       "         1.00000e+06,   5.62341e+06,   3.16228e+07,   1.77828e+08,\n",
       "         1.00000e+09])},\n",
       "       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,\n",
       "       scoring='accuracy', verbose=1)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cv = GridSearchCV(estimator=SVC(),\n",
    "                  param_grid=paramgrid,\n",
    "                  scoring=\"accuracy\",\n",
    "                  cv=StratifiedKFold(n_splits=2),\n",
    "                  verbose=1)\n",
    "%time cv.fit(X, y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Best score + params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.98942682248191427, {'C': 1.0, 'gamma': 0.001, 'kernel': 'rbf'})"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cv.best_score_, cv.best_params_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "An example of the \"cannonical\" cv_results_ table in sklearn:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean_fit_time</th>\n",
       "      <th>mean_score_time</th>\n",
       "      <th>mean_test_score</th>\n",
       "      <th>mean_train_score</th>\n",
       "      <th>param_C</th>\n",
       "      <th>param_gamma</th>\n",
       "      <th>param_kernel</th>\n",
       "      <th>params</th>\n",
       "      <th>rank_test_score</th>\n",
       "      <th>split0_test_score</th>\n",
       "      <th>split0_train_score</th>\n",
       "      <th>split1_test_score</th>\n",
       "      <th>split1_train_score</th>\n",
       "      <th>std_fit_time</th>\n",
       "      <th>std_score_time</th>\n",
       "      <th>std_test_score</th>\n",
       "      <th>std_train_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>308</th>\n",
       "      <td>0.011508</td>\n",
       "      <td>0.009006</td>\n",
       "      <td>0.989427</td>\n",
       "      <td>0.999444</td>\n",
       "      <td>1</td>\n",
       "      <td>0.001</td>\n",
       "      <td>rbf</td>\n",
       "      <td>{'C': 1.0, 'gamma': 0.001, 'kernel': 'rbf'}</td>\n",
       "      <td>1</td>\n",
       "      <td>0.986652</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.992205</td>\n",
       "      <td>0.998888</td>\n",
       "      <td>0.000501</td>\n",
       "      <td>0.001002</td>\n",
       "      <td>0.002777</td>\n",
       "      <td>0.000556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>358</th>\n",
       "      <td>0.015010</td>\n",
       "      <td>0.012015</td>\n",
       "      <td>0.988870</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>31.6228</td>\n",
       "      <td>0.001</td>\n",
       "      <td>rbf</td>\n",
       "      <td>{'C': 31.6227766017, 'gamma': 0.001, 'kernel':...</td>\n",
       "      <td>2</td>\n",
       "      <td>0.986652</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.991091</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.001001</td>\n",
       "      <td>0.000008</td>\n",
       "      <td>0.002220</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>483</th>\n",
       "      <td>0.014009</td>\n",
       "      <td>0.011508</td>\n",
       "      <td>0.988870</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>177828</td>\n",
       "      <td>0.001</td>\n",
       "      <td>rbf</td>\n",
       "      <td>{'C': 177827.941004, 'gamma': 0.001, 'kernel':...</td>\n",
       "      <td>2</td>\n",
       "      <td>0.986652</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.991091</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.001001</td>\n",
       "      <td>0.000500</td>\n",
       "      <td>0.002220</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>458</th>\n",
       "      <td>0.010507</td>\n",
       "      <td>0.009515</td>\n",
       "      <td>0.988870</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>31622.8</td>\n",
       "      <td>0.001</td>\n",
       "      <td>rbf</td>\n",
       "      <td>{'C': 31622.7766017, 'gamma': 0.001, 'kernel':...</td>\n",
       "      <td>2</td>\n",
       "      <td>0.986652</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.991091</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000483</td>\n",
       "      <td>0.000509</td>\n",
       "      <td>0.002220</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>408</th>\n",
       "      <td>0.016511</td>\n",
       "      <td>0.011507</td>\n",
       "      <td>0.988870</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1000</td>\n",
       "      <td>0.001</td>\n",
       "      <td>rbf</td>\n",
       "      <td>{'C': 1000.0, 'gamma': 0.001, 'kernel': 'rbf'}</td>\n",
       "      <td>2</td>\n",
       "      <td>0.986652</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.991091</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.001501</td>\n",
       "      <td>0.000500</td>\n",
       "      <td>0.002220</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     mean_fit_time  mean_score_time  mean_test_score  mean_train_score  \\\n",
       "308       0.011508         0.009006         0.989427          0.999444   \n",
       "358       0.015010         0.012015         0.988870          1.000000   \n",
       "483       0.014009         0.011508         0.988870          1.000000   \n",
       "458       0.010507         0.009515         0.988870          1.000000   \n",
       "408       0.016511         0.011507         0.988870          1.000000   \n",
       "\n",
       "     param_C param_gamma param_kernel  \\\n",
       "308        1       0.001          rbf   \n",
       "358  31.6228       0.001          rbf   \n",
       "483   177828       0.001          rbf   \n",
       "458  31622.8       0.001          rbf   \n",
       "408     1000       0.001          rbf   \n",
       "\n",
       "                                                params  rank_test_score  \\\n",
       "308        {'C': 1.0, 'gamma': 0.001, 'kernel': 'rbf'}                1   \n",
       "358  {'C': 31.6227766017, 'gamma': 0.001, 'kernel':...                2   \n",
       "483  {'C': 177827.941004, 'gamma': 0.001, 'kernel':...                2   \n",
       "458  {'C': 31622.7766017, 'gamma': 0.001, 'kernel':...                2   \n",
       "408     {'C': 1000.0, 'gamma': 0.001, 'kernel': 'rbf'}                2   \n",
       "\n",
       "     split0_test_score  split0_train_score  split1_test_score  \\\n",
       "308           0.986652                 1.0           0.992205   \n",
       "358           0.986652                 1.0           0.991091   \n",
       "483           0.986652                 1.0           0.991091   \n",
       "458           0.986652                 1.0           0.991091   \n",
       "408           0.986652                 1.0           0.991091   \n",
       "\n",
       "     split1_train_score  std_fit_time  std_score_time  std_test_score  \\\n",
       "308            0.998888      0.000501        0.001002        0.002777   \n",
       "358            1.000000      0.001001        0.000008        0.002220   \n",
       "483            1.000000      0.001001        0.000500        0.002220   \n",
       "458            1.000000      0.000483        0.000509        0.002220   \n",
       "408            1.000000      0.001501        0.000500        0.002220   \n",
       "\n",
       "     std_train_score  \n",
       "308         0.000556  \n",
       "358         0.000000  \n",
       "483         0.000000  \n",
       "458         0.000000  \n",
       "408         0.000000  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(cv.cv_results_).sort_values(\"mean_test_score\", ascending=False).head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Using RandomizedSearchCV"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Same parameter space, but only test 250 random combinations."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 2 folds for each of 250 candidates, totalling 500 fits\n",
      "Wall time: 1min 19s\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  1.3min finished\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "RandomizedSearchCV(cv=StratifiedKFold(n_splits=2, random_state=None, shuffle=False),\n",
       "          error_score='raise',\n",
       "          estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
       "  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n",
       "  max_iter=-1, probability=False, random_state=None, shrinking=True,\n",
       "  tol=0.001, verbose=False),\n",
       "          fit_params=None, iid=True, n_iter=250, n_jobs=1,\n",
       "          param_distributions={'C': array([  1.00000e-09,   5.62341e-09,   3.16228e-08,   1.77828e-07,\n",
       "         1.00000e-06,   5.62341e-06,   3.16228e-05,   1.77828e-04,\n",
       "         1.00000e-03,   5.62341e-03,   3.16228e-02,   1.77828e-01,\n",
       "         1.00000e+00,   5.62341e+00,   3.16228e+01,   1.77828e+02,\n",
       "      ...7828e+05,\n",
       "         1.00000e+06,   5.62341e+06,   3.16228e+07,   1.77828e+08,\n",
       "         1.00000e+09])},\n",
       "          pre_dispatch='2*n_jobs', random_state=None, refit=True,\n",
       "          return_train_score=True, scoring='accuracy', verbose=1)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cv = RandomizedSearchCV(estimator=SVC(),\n",
    "                        param_distributions=paramgrid,\n",
    "                        n_iter=250,\n",
    "                        scoring=\"accuracy\",\n",
    "                        cv=StratifiedKFold(n_splits=2),\n",
    "                        verbose=1)\n",
    "%time cv.fit(X, y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Best score + params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.98942682248191427, {'C': 1.0, 'gamma': 0.001, 'kernel': 'rbf'})"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cv.best_score_, cv.best_params_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "An example of the \"cannonical\" cv_results_ table in sklearn:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean_fit_time</th>\n",
       "      <th>mean_score_time</th>\n",
       "      <th>mean_test_score</th>\n",
       "      <th>mean_train_score</th>\n",
       "      <th>param_C</th>\n",
       "      <th>param_gamma</th>\n",
       "      <th>param_kernel</th>\n",
       "      <th>params</th>\n",
       "      <th>rank_test_score</th>\n",
       "      <th>split0_test_score</th>\n",
       "      <th>split0_train_score</th>\n",
       "      <th>split1_test_score</th>\n",
       "      <th>split1_train_score</th>\n",
       "      <th>std_fit_time</th>\n",
       "      <th>std_score_time</th>\n",
       "      <th>std_test_score</th>\n",
       "      <th>std_train_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>0.011499</td>\n",
       "      <td>0.009007</td>\n",
       "      <td>0.989427</td>\n",
       "      <td>0.999444</td>\n",
       "      <td>1</td>\n",
       "      <td>0.001</td>\n",
       "      <td>rbf</td>\n",
       "      <td>{'C': 1.0, 'kernel': 'rbf', 'gamma': 0.001}</td>\n",
       "      <td>1</td>\n",
       "      <td>0.986652</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.992205</td>\n",
       "      <td>0.998888</td>\n",
       "      <td>0.000491</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.002777</td>\n",
       "      <td>0.000556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220</th>\n",
       "      <td>0.010001</td>\n",
       "      <td>0.008000</td>\n",
       "      <td>0.988870</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1000</td>\n",
       "      <td>0.001</td>\n",
       "      <td>rbf</td>\n",
       "      <td>{'C': 1000.0, 'kernel': 'rbf', 'gamma': 0.001}</td>\n",
       "      <td>2</td>\n",
       "      <td>0.986652</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.991091</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.002001</td>\n",
       "      <td>3.576279e-07</td>\n",
       "      <td>0.002220</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>169</th>\n",
       "      <td>0.010010</td>\n",
       "      <td>0.008000</td>\n",
       "      <td>0.988870</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>5623.41</td>\n",
       "      <td>0.001</td>\n",
       "      <td>rbf</td>\n",
       "      <td>{'C': 5623.4132519, 'kernel': 'rbf', 'gamma': ...</td>\n",
       "      <td>2</td>\n",
       "      <td>0.986652</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.991091</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.001991</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.002220</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>181</th>\n",
       "      <td>0.012018</td>\n",
       "      <td>0.007991</td>\n",
       "      <td>0.988870</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>177828</td>\n",
       "      <td>0.001</td>\n",
       "      <td>rbf</td>\n",
       "      <td>{'C': 177827.941004, 'kernel': 'rbf', 'gamma':...</td>\n",
       "      <td>2</td>\n",
       "      <td>0.986652</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.991091</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000002</td>\n",
       "      <td>8.940697e-06</td>\n",
       "      <td>0.002220</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>142</th>\n",
       "      <td>0.014510</td>\n",
       "      <td>0.011501</td>\n",
       "      <td>0.988870</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>5.62341</td>\n",
       "      <td>0.001</td>\n",
       "      <td>rbf</td>\n",
       "      <td>{'C': 5.6234132519, 'kernel': 'rbf', 'gamma': ...</td>\n",
       "      <td>2</td>\n",
       "      <td>0.986652</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.991091</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000501</td>\n",
       "      <td>4.922152e-04</td>\n",
       "      <td>0.002220</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     mean_fit_time  mean_score_time  mean_test_score  mean_train_score  \\\n",
       "43        0.011499         0.009007         0.989427          0.999444   \n",
       "220       0.010001         0.008000         0.988870          1.000000   \n",
       "169       0.010010         0.008000         0.988870          1.000000   \n",
       "181       0.012018         0.007991         0.988870          1.000000   \n",
       "142       0.014510         0.011501         0.988870          1.000000   \n",
       "\n",
       "     param_C param_gamma param_kernel  \\\n",
       "43         1       0.001          rbf   \n",
       "220     1000       0.001          rbf   \n",
       "169  5623.41       0.001          rbf   \n",
       "181   177828       0.001          rbf   \n",
       "142  5.62341       0.001          rbf   \n",
       "\n",
       "                                                params  rank_test_score  \\\n",
       "43         {'C': 1.0, 'kernel': 'rbf', 'gamma': 0.001}                1   \n",
       "220     {'C': 1000.0, 'kernel': 'rbf', 'gamma': 0.001}                2   \n",
       "169  {'C': 5623.4132519, 'kernel': 'rbf', 'gamma': ...                2   \n",
       "181  {'C': 177827.941004, 'kernel': 'rbf', 'gamma':...                2   \n",
       "142  {'C': 5.6234132519, 'kernel': 'rbf', 'gamma': ...                2   \n",
       "\n",
       "     split0_test_score  split0_train_score  split1_test_score  \\\n",
       "43            0.986652                 1.0           0.992205   \n",
       "220           0.986652                 1.0           0.991091   \n",
       "169           0.986652                 1.0           0.991091   \n",
       "181           0.986652                 1.0           0.991091   \n",
       "142           0.986652                 1.0           0.991091   \n",
       "\n",
       "     split1_train_score  std_fit_time  std_score_time  std_test_score  \\\n",
       "43             0.998888      0.000491    0.000000e+00        0.002777   \n",
       "220            1.000000      0.002001    3.576279e-07        0.002220   \n",
       "169            1.000000      0.001991    0.000000e+00        0.002220   \n",
       "181            1.000000      0.000002    8.940697e-06        0.002220   \n",
       "142            1.000000      0.000501    4.922152e-04        0.002220   \n",
       "\n",
       "     std_train_score  \n",
       "43          0.000556  \n",
       "220         0.000000  \n",
       "169         0.000000  \n",
       "181         0.000000  \n",
       "142         0.000000  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(cv.cv_results_).sort_values(\"mean_test_score\", ascending=False).head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Using EvolutionaryAlgorithmSearchCV"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Again same parameter space, optimize for 10 generations."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Types [2, 1, 2] and maxint [24, 0, 24] detected\n",
      "--- Evolve in 625 possible combinations ---\n",
      "gen\tnevals\tavg     \tmin    \tmax    \tstd      \n",
      "0  \t50    \t0.910373\t0.89872\t0.98887\t0.0276682\n",
      "1  \t29    \t0.925732\t0.89872\t0.98887\t0.0376028\n",
      "2  \t27    \t0.950083\t0.89872\t0.98887\t0.0394634\n",
      "3  \t23    \t0.978186\t0.89872\t0.98887\t0.0238026\n",
      "4  \t29    \t0.988715\t0.984975\t0.98887\t0.000763336\n",
      "5  \t32    \t0.98887 \t0.98887 \t0.98887\t1.11022e-16\n",
      "6  \t30    \t0.988726\t0.981636\t0.98887\t0.0010128  \n",
      "7  \t36    \t0.983294\t0.89872 \t0.98887\t0.0213992  \n",
      "8  \t34    \t0.986945\t0.89872 \t0.98887\t0.0126326  \n",
      "9  \t26    \t0.988614\t0.979967\t0.98887\t0.00135036 \n",
      "10 \t37    \t0.986989\t0.89872 \t0.98887\t0.0126217  \n",
      "Best individual is: {'C': 1000000.0, 'kernel': 'rbf', 'gamma': 0.001}\n",
      "with fitness: 0.9888703394546466\n",
      "Wall time: 17.6 s\n"
     ]
    }
   ],
   "source": [
    "if __name__==\"__main__\":\n",
    "    #pool = Pool(4)\n",
    "    cv = EvolutionaryAlgorithmSearchCV(estimator=SVC(),\n",
    "                                       params=paramgrid,\n",
    "                                       scoring=\"accuracy\",\n",
    "                                       cv=StratifiedKFold(n_splits=2),\n",
    "                                       verbose=True,\n",
    "                                       population_size=50,\n",
    "                                       gene_mutation_prob=0.10,\n",
    "                                       tournament_size=3,\n",
    "                                       generations_number=10)\n",
    "                                       #pmap = pool.map)\n",
    "    %time cv.fit(X, y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Best score + params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.9888703394546466, {'C': 1000000.0, 'gamma': 0.001, 'kernel': 'rbf'})"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cv.best_score_, cv.best_params_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Our cv_results_ table (note, includes all individuals with their mean, max, min, and std test score)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>max_test_score</th>\n",
       "      <th>mean_test_score</th>\n",
       "      <th>min_test_score</th>\n",
       "      <th>nan_test_score?</th>\n",
       "      <th>param_index</th>\n",
       "      <th>params</th>\n",
       "      <th>std_test_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>151</th>\n",
       "      <td>226</td>\n",
       "      <td>0.98887</td>\n",
       "      <td>0.98887</td>\n",
       "      <td>0.98887</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>{'C': 1000000.0, 'kernel': 'rbf', 'gamma': 0.001}</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>208</th>\n",
       "      <td>293</td>\n",
       "      <td>0.98887</td>\n",
       "      <td>0.98887</td>\n",
       "      <td>0.98887</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>{'C': 1000000.0, 'kernel': 'rbf', 'gamma': 0.001}</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>206</th>\n",
       "      <td>291</td>\n",
       "      <td>0.98887</td>\n",
       "      <td>0.98887</td>\n",
       "      <td>0.98887</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>{'C': 1000000.0, 'kernel': 'rbf', 'gamma': 0.001}</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>205</th>\n",
       "      <td>290</td>\n",
       "      <td>0.98887</td>\n",
       "      <td>0.98887</td>\n",
       "      <td>0.98887</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>{'C': 1000000.0, 'kernel': 'rbf', 'gamma': 100...</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>284</td>\n",
       "      <td>0.98887</td>\n",
       "      <td>0.98887</td>\n",
       "      <td>0.98887</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>{'C': 1000000.0, 'kernel': 'rbf', 'gamma': 0.001}</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     index  max_test_score  mean_test_score  min_test_score  nan_test_score?  \\\n",
       "151    226         0.98887          0.98887         0.98887            False   \n",
       "208    293         0.98887          0.98887         0.98887            False   \n",
       "206    291         0.98887          0.98887         0.98887            False   \n",
       "205    290         0.98887          0.98887         0.98887            False   \n",
       "204    284         0.98887          0.98887         0.98887            False   \n",
       "\n",
       "     param_index                                             params  \\\n",
       "151            0  {'C': 1000000.0, 'kernel': 'rbf', 'gamma': 0.001}   \n",
       "208            0  {'C': 1000000.0, 'kernel': 'rbf', 'gamma': 0.001}   \n",
       "206            0  {'C': 1000000.0, 'kernel': 'rbf', 'gamma': 0.001}   \n",
       "205            0  {'C': 1000000.0, 'kernel': 'rbf', 'gamma': 100...   \n",
       "204            0  {'C': 1000000.0, 'kernel': 'rbf', 'gamma': 0.001}   \n",
       "\n",
       "     std_test_score  \n",
       "151             0.0  \n",
       "208             0.0  \n",
       "206             0.0  \n",
       "205             0.0  \n",
       "204             0.0  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(cv.cv_results_).sort_values(\"mean_test_score\", ascending=False).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
